package com.xzx.spark.core.exercise

import org.apache.spark.{SparkConf, SparkContext}

/**
 *
 * agent.log：时间戳，省份，城市，用户，广告，中间字段使用空格分隔。
 * 统计出每一个省份每个广告被点击数量排行的 Top3
 *
 * @author xinzhixuan
 * @version 1.0
 * @date 2021-08-06 9:24 下午
 */
object Exercise01 {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setMaster("local[*]").setAppName(getClass.getSimpleName)
    val sc = new SparkContext(sparkConf)
    val rdd = sc.textFile("src/main/scala/com/xzx/spark/core/exercise/agent.log").map(x => {
      val data = x.split(" ")
      ((data(1), data(4)), 1)
    })
      .reduceByKey(_ + _)
//      .map(x=>(x._1._1, (x._1._2, x._2)))
      .map {//模式匹配代码可读性强
        case ((province, ad), sum) =>
          (province, (ad, sum))
      }
      .groupByKey()
      .mapValues(data => {
        data.toList.sortBy(x => x._2).reverse.take(3)
      })
    rdd.foreach(println)
    sc.stop()
  }
}
